###############################################################################
## AUTHOR: ALAN YAN
## DATE LAST UPDATED: 09/27/2020
## PURPOSE: HET EFFECTS VIA INTERACTION (KEY COMPONENT = INCOME CONTINUOUS)
###############################################################################
rm(list = ls())

#### NOTES ####

#(1) create a tidier function for random forest output

#### PACKAGES ####
library(pacman)
p_load(tidyverse,
       estimatr,
       broom,
       haven,
       hrbrthemes,
       cjoint,
       formula.tools, 
       DeclareDesign,
       emmeans,
       lmtest,
       cregg,
       Cairo)

#### FUNCTIONS ####

# ggplot theme
theme_shom_alt <- function (base_size = 11, waffle = FALSE) 
{
  ret <- theme_minimal(base_size = base_size, base_family = "Roboto Condensed") + 
    theme(plot.background = element_rect(fill = "#f5f5f2", 
                                         color = NA), 
          panel.background = element_rect(fill = "#f5f5f2",color = NA), 
          legend.background = element_rect(fill = "#f5f5f2",color = NA))
  if (waffle) {
    ret + theme(axis.text = element_blank())
  }
  else {
    ret
  }
}

# lm robust function that includes omitted categories for plotting
lm_robust_omitted <- function(formula, data, clusters) {
  require(formula.tools)
  require(tidyverse)
  require(haven)
  results <- lm_robust(formula = formula,
                       data = data,
                       clusters = clusters) %>%
    tidy() 
  omitted_cat <- model.frame(formula = formula, data = data) %>% 
    select(2:length(.)) %>% 
    sapply(., levels) %>% 
    lapply(., "[[", 1) %>% 
    unlist() %>%
    as.data.frame()
  omitted_cat_df <- cbind(paste0(rownames(omitted_cat), omitted_cat[,1]), 
                          0, 0, 0, 0, 0, 0, 0, 
                          lhs.vars(formula)) %>%
    as.data.frame() %>%
    zap_labels() %>%
    setNames(., names(results))
  results_with_omitted <- rbind(results, 
                                omitted_cat_df)
  results_with_omitted[,c(2:8)] <- sapply(results_with_omitted[,c(2:8)],as.numeric)
  return(results_with_omitted)
}

#### LOAD DATA ####
df <- read_rds("01-yougov-conjoint/01-data/cleaned/yougov-conjoint-cleaned.rds")

#### ANALYZE DATA (NO CONTROLS) ####
#Do people prefer firms with these features?
main_work_td <- lm_robust_omitted(work_binary ~ 
                                    r_income*as.factor(cje_corporate_gov) +
                                    r_income*as.factor(cje_corporate_resp) +
                                    r_income*as.factor(cje_firm_size) +
                                    r_income*as.factor(cje_political_donations) + 
                                    r_income*as.factor(cje_gender_ownership) + 
                                    r_income*as.factor(cje_healthcare) + 
                                    r_income*as.factor(cje_hours) + 
                                    r_income*as.factor(cje_training) + 
                                    r_income*as.factor(cje_location) + 
                                    r_income*as.factor(cje_sick_leave) + 
                                    r_income*as.factor(cje_parental_leave) + 
                                    r_income*as.factor(cje_race_ownership) + 
                                    r_income*as.factor(cje_retirement) + 
                                    r_income*as.factor(cje_income) +
                                    r_income*as.factor(cje_type_of_work) +
                                    r_income*as.factor(cje_union) +
                                    r_income*as.factor(cje_work_from_home) +
                                    r_income*as.factor(cje_team_work) +
                                    r_income*as.factor(cje_work_culture) +
                                    r_income,
                                  data = df,
                                  clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = factor(gsub(x = term,pattern = "as.factor",replacement = "")),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean) |
           term_clean == "r_income")

#Do people believe some firms are better at dealing with complaints?
main_complaints_td <- lm_robust_omitted(complaints_binary ~ 
                                          r_income*as.factor(cje_corporate_gov) +
                                          r_income*as.factor(cje_corporate_resp) +
                                          r_income*as.factor(cje_firm_size) +
                                          r_income*as.factor(cje_political_donations) + 
                                          r_income*as.factor(cje_gender_ownership) + 
                                          r_income*as.factor(cje_healthcare) + 
                                          r_income*as.factor(cje_hours) + 
                                          r_income*as.factor(cje_training) + 
                                          r_income*as.factor(cje_location) + 
                                          r_income*as.factor(cje_sick_leave) + 
                                          r_income*as.factor(cje_parental_leave) + 
                                          r_income*as.factor(cje_race_ownership) + 
                                          r_income*as.factor(cje_retirement) + 
                                          r_income*as.factor(cje_income) +
                                          r_income*as.factor(cje_type_of_work) +
                                          r_income*as.factor(cje_union) +
                                          r_income*as.factor(cje_work_from_home) +
                                          r_income*as.factor(cje_team_work) +
                                          r_income*as.factor(cje_work_culture) +
                                          r_income,
                                        data = df,
                                        clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean) |
           term_clean == "r_income")

#Do people believe they'll have more responsibilties at some firms?
main_responsibility_td <- lm_robust_omitted(responsibility_binary ~ 
                                              r_income*as.factor(cje_corporate_gov) +
                                              r_income*as.factor(cje_corporate_resp) +
                                              r_income*as.factor(cje_firm_size) +
                                              r_income*as.factor(cje_political_donations) + 
                                              r_income*as.factor(cje_gender_ownership) + 
                                              r_income*as.factor(cje_healthcare) + 
                                              r_income*as.factor(cje_hours) + 
                                              r_income*as.factor(cje_training) + 
                                              r_income*as.factor(cje_location) + 
                                              r_income*as.factor(cje_sick_leave) + 
                                              r_income*as.factor(cje_parental_leave) + 
                                              r_income*as.factor(cje_race_ownership) + 
                                              r_income*as.factor(cje_retirement) + 
                                              r_income*as.factor(cje_income) +
                                              r_income*as.factor(cje_type_of_work) +
                                              r_income*as.factor(cje_union) +
                                              r_income*as.factor(cje_work_from_home) +
                                              r_income*as.factor(cje_team_work) +
                                              r_income*as.factor(cje_work_culture) +
                                              r_income,
                                            data = df,
                                            clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean) |
           term_clean == "r_income")

#Do people believe they'll have more power at some firms?
main_power_td <- lm_robust_omitted(power_binary ~ 
                                     r_income*as.factor(cje_corporate_gov) +
                                     r_income*as.factor(cje_corporate_resp) +
                                     r_income*as.factor(cje_firm_size) +
                                     r_income*as.factor(cje_political_donations) + 
                                     r_income*as.factor(cje_gender_ownership) + 
                                     r_income*as.factor(cje_healthcare) + 
                                     r_income*as.factor(cje_hours) + 
                                     r_income*as.factor(cje_training) + 
                                     r_income*as.factor(cje_location) + 
                                     r_income*as.factor(cje_sick_leave) + 
                                     r_income*as.factor(cje_parental_leave) + 
                                     r_income*as.factor(cje_race_ownership) + 
                                     r_income*as.factor(cje_retirement) + 
                                     r_income*as.factor(cje_income) +
                                     r_income*as.factor(cje_type_of_work) +
                                     r_income*as.factor(cje_union) +
                                     r_income*as.factor(cje_work_from_home) +
                                     r_income*as.factor(cje_team_work) +
                                     r_income*as.factor(cje_work_culture) +
                                     r_income,
                                   data = df,
                                   clusters = df$rid
) %>%
  filter(term != "(Intercept)") %>%
  mutate(term_clean = gsub(x = term,pattern = "as.factor",replacement = ""),
         estimate = ifelse(is.na(estimate), 0, estimate),
         std.error = ifelse(is.na(std.error), 0, std.error),
         conf.low.90 = estimate - 1.64*std.error,
         conf.high.90 = estimate + 1.64*std.error) %>%
  filter(grepl("cje_corporate_gov",term_clean) |
           term_clean == "r_income")

#### PLOT JUST WORK AND POWER (FIGURE S11) ####

plot_tbl_all_effects <- bind_rows(main_work_td,
                                  main_power_td,
                                  main_responsibility_td,
                                  main_complaints_td) %>%
  mutate(outcome_clean = case_when(
    outcome == "work_binary" ~ "Prefer to Work",
    outcome == "power_binary" ~ "More Power",
    outcome == "responsibility_binary" ~ "More Responsibilities",
    outcome == "complaints_binary" ~ "Better Handle Complaints"
  )) %>%
  mutate(outcome_clean = fct_relevel(outcome_clean,"Prefer to Work",
                                     "More Power",
                                     "More Responsibilities",
                                     "Better Handle Complaints")) %>%
  mutate(term_clean = gsub(x = term_clean,pattern = "(cje_corporate_gov)",replacement = "",fixed = T),
         term_clean = gsub(x = term_clean, pattern = "r_income", replacement = "Income", fixed = TRUE),
         term_clean = gsub(x = term_clean, pattern = ":", replacement = " X ", fixed = TRUE)) %>%
  ggplot(aes(x = term_clean, y = estimate)) +
  facet_wrap(~outcome_clean) + 
  geom_linerange(aes(ymin = conf.low.90,ymax = conf.high.90),alpha = 0.7,
                 color = "indianred",size = 3) + #CIs around point estimates
  geom_linerange(aes(ymin = conf.low,ymax = conf.high),alpha = 0.3,
                 color = "indianred",size = 3) + 
  geom_hline(yintercept = 0,linetype = 'dashed',size = 1.5) + 
  geom_point(size = 3,shape = 21,fill = "white") +
  theme_shom_alt(base_size = 16) + 
  theme(plot.caption = element_text(size = 10,hjust = 1),
        plot.caption.position = "plot",
        strip.background = element_rect(fill="grey"),
        strip.text = element_text(color = 'black'),
        legend.position = "bottom",
        strip.text.x = element_text(size = 11),
        strip.text.y = element_text(size = 11)) + 
  labs(x = "Treatment",
       y = "Average Marginal Component Effect Relative to Baseline Condition",
       caption = "Notes: Estimates generated via Ordinary Least Squares with standard errors clustered at the respondent level. 
       The dark shaded region represents the 90% confidence interval while the light shaded region represents the 95% confidence interval.") + 
  coord_flip()
ggsave("01-yougov-conjoint/03-src/output/figures/hte-workplace-income-interaction.pdf",
         dpi = 320,width = 10,height = 6,device = cairo_pdf)
plot_tbl_all_effects
